Introduction

This markdown provides comparison between three meQTL analysis. All meQTL analysis were done using the R package MatrixEQTL.

For all analysis, the same DNAm data were used which underwent following QC steps:

DNAm QC

The genotype data were different for each analysis:

  1. The SNP data were used where the uncertain genotype was replaced with the most likely genotype

  2. The uncertain genotypes were left as they are

  3. SNP probabilities, i.e. dosages, were used

For all three analysis, the imputed QC SNP data (no LD pruning, the MHC regions are included) were used. The data includes 3’957’338 SNPs.

meQTL analysis: using SNPs’ probabilities

All meQTLs passed the significance threshold of FDR = 0.05.

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

Scatter plot

fdr.thr <- 0.05
plot.title <- paste0("Significant at FDR <= ", fdr.thr, " meQTLs.", "\nResult of MatrixEQTL")
GetScatterPlot2(meqtl.all.prob.df, plot.title = plot.title, cbPalette = cbPalette)

Upset plots

meQTL level

meqtls.meqtls <- list(delta = meqtl.all.prob.df[treatment == "delta", meQTL_ID], veh = meqtl.all.prob.df[treatment ==
    "baseline", meQTL_ID])

intersect.meqtls <- intersect(meqtls.meqtls$delta, meqtls.meqtls$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect.meqtls)/length(meqtls.meqtls$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect.meqtls)/length(meqtls.meqtls$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTLs"

ggVennDiagram::ggVennDiagram(meqtls.meqtls, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

SNP level

meqtls.snps <- list(delta = meqtl.all.prob.df[treatment == "delta", SNP], veh = meqtl.all.prob.df[treatment ==
    "baseline", SNP])

intersect <- intersect(meqtls.snps$delta, meqtls.snps$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect)/length(meqtls.snps$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect)/length(meqtls.snps$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTL SNPs"

ggVennDiagram::ggVennDiagram(meqtls.snps, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

CpG level

meqtls.cpg <- list(delta = meqtl.all.prob.df[treatment == "delta", CpG_ID], veh = meqtl.all.prob.df[treatment ==
    "baseline", CpG_ID])

intersect <- intersect(meqtls.cpg$delta, meqtls.cpg$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect)/length(meqtls.cpg$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect)/length(meqtls.cpg$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTL CpGs"

ggVennDiagram::ggVennDiagram(meqtls.cpg, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

meQTLs analysis: using SNPs with missingness

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

meQTLs analysis: using the most likely genotype (mode)

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

Overlap between meQTLs from different approaches

Probabilities vs missigness

meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", meQTL_ID], miss = meqtl.all.na.df[treatment ==
    "delta", meQTL_ID])

meqtls.veh <- list(prob = meqtl.all.prob.df[treatment == "baseline", meQTL_ID], miss = meqtl.all.na.df[treatment ==
    "baseline", meQTL_ID])

delta GR-induced

euler.tbl <- euler(meqtls.delta)
intersect <- euler.tbl$original.values["prob&miss"]

perc.olap.prob.with.miss <- scales::percent(intersect/length(meqtls.delta$prob), accuracy = 0.1)
perc.olap.miss.with.prob <- scales::percent(intersect/length(meqtls.delta$miss), accuracy = 0.1)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "red", "brown"),
    labels = c("probabilities", "missigness"))

baseline

euler.tbl <- euler(meqtls.veh)
intersect <- euler.tbl$original.values["prob&miss"]

perc.olap.prob.with.miss <- scales::percent(intersect/length(meqtls.veh$prob), accuracy = 0.1)
perc.olap.miss.with.prob <- scales::percent(intersect/length(meqtls.veh$miss), accuracy = 0.1)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    "missigness"))

Probabilities vs most likely genotype

meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", meQTL_ID], mode = meqtl.all.mode.df[treatment ==
    "delta", meQTL_ID])

meqtls.veh <- list(prob = meqtl.all.prob.df[treatment == "baseline", meQTL_ID], mode = meqtl.all.mode.df[treatment ==
    "baseline", meQTL_ID])

delta GR-induced

euler.tbl <- euler(meqtls.delta)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

Case visualisation of delta GR-induced meQTL results

meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", meQTL_ID], miss = meqtl.all.na.df[treatment ==
    "delta", meQTL_ID], mode = meqtl.all.mode.df[treatment == "delta", meQTL_ID])

The example of meQTL which is significant only for “the most likely genotype” case

unique.mode.meqtl <- setdiff(meqtls.delta$mode, union(meqtls.delta$prob, meqtls.delta$miss))
unique.mode.meqtl.df <- meqtl.all.mode.df[treatment == "delta"][meQTL_ID %in% unique.mode.meqtl]
selected.meqtl <- unique.mode.meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs9961090 cg20071505 0.04 9.99 3.19e-19 2.98e-13 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The example of meQTL which is significant only for the “dosage” case

unique.meqtl <- setdiff(meqtls.delta$prob, union(meqtls.delta$mode, meqtls.delta$miss))
unique.meqtl.df <- meqtl.all.prob.df[treatment == "delta"][meQTL_ID %in% unique.meqtl]
selected.meqtl <- unique.meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs11655681 cg17105886 -0.0407 -5.75 3.35e-08 0.00231 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The example of meQTL which is significant for all case

# meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == 'delta', meQTL_ID], miss =
# meqtl.all.na.df[treatment == 'delta', meQTL_ID], mode = meqtl.all.mode.df[treatment == 'delta',
# meQTL_ID])

meqtl.lst <- intersect(intersect(meqtls.delta$mode, meqtls.delta$prob), meqtls.delta$miss)

The most signififcant from the analysis where uncertain genotypes were replaced with the most likely one

meqtl.df <- meqtl.all.mode.df[treatment == "delta"][meQTL_ID %in% meqtl.lst]
selected.meqtl <- meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs9406094 cg12777182 0.0694 16.6 3.06e-39 3.01e-30 delta
selected.meqtl <- meqtl.df[1]

ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The most signififcant from the analysis where dosages were used

meqtl.df <- meqtl.all.prob.df[treatment == "delta"][meQTL_ID %in% meqtl.lst]
selected.meqtl <- meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs12310416 cg07195891 -0.0718 -16.7 2.35e-39 1.84e-30 delta
selected.meqtl <- meqtl.df[1]

ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The most signififcant from the analysis where genotype missingness is present

meqtl.df <- meqtl.all.na.df[treatment == "delta"][meQTL_ID %in% meqtl.lst]
selected.meqtl <- meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs12310416 cg07195891 0.0718 16.7 2.35e-39 3.21e-30 delta
selected.meqtl <- meqtl.df[1]

ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)